library(readr)

Untreated <- readRDS(paste0(wd,"/data/NCI_TPW_gep_untreated.rds"))
Treated <- readRDS(paste0(wd,"/data/NCI_TPW_gep_treated.rds"))
Metadata = read.table(paste0(wd,"/data/NCI_TPW_metadata.tsv"), header = TRUE, sep ="\t", stringsAsFactors = TRUE)
Cellline_annotation = read.table(paste0(wd,"/data/cellline_annotation.tsv"), header = TRUE, sep ="\t", stringsAsFactors = TRUE)
Drug_annotation = read.table(paste0(wd,"/data/drug_annotation.tsv"), header = TRUE, sep ="\t", stringsAsFactors = TRUE)


Treated <- as.data.frame(Treated)
Untreated <- as.data.frame(Untreated)
#Find cell lines, which belong to vorinostat:
#Untreated matrix:
UntreatedVorinostatcolumns <- grep(pattern = "vorinostat",colnames(Untreated))
UntreatedVorinostatcolumns
##  [1] 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777
## [18] 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794
## [35] 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811
## [52] 812 813 814 815 816 817 818 819
#-> seems like column 761 - 819 belongs to vorinostat. Check if that is true:
identical(UntreatedVorinostatcolumns, 761:819)
## [1] TRUE
#-> TRUE

#Same with treated matrix:
TreatedVorinostatcolumns <- grep(pattern = "vorinostat",colnames(Treated))
TreatedVorinostatcolumns
##  [1] 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777
## [18] 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794
## [35] 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811
## [52] 812 813 814 815 816 817 818 819
identical(TreatedVorinostatcolumns, 761:819)
## [1] TRUE
#Define Vorinostat-data: 
UntreatedVorinostat <- Untreated[,UntreatedVorinostatcolumns]
TreatedVorinostat <- Treated[,TreatedVorinostatcolumns]

#fold change matrix 
FC <- TreatedVorinostat - UntreatedVorinostat

#Create a common matrix for treated and untreated vorinostat data:
 VorinostatTotal <- cbind(UntreatedVorinostat, TreatedVorinostat)
  


  
    # variable um Zahlen zu ersetzen 
 col_untreated = grep ('_0nM',colnames(VorinostatTotal))
 col_untreated
##  [1]  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
## [24] 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
## [47] 47 48 49 50 51 52 53 54 55 56 57 58 59
 col_treated = grep ('_5000nM',colnames(VorinostatTotal))
 col_treated
##  [1]  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76
## [18]  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93
## [35]  94  95  96  97  98  99 100 101 102 103 104 105 106 107 108 109 110
## [52] 111 112 113 114 115 116 117 118
library(clusterProfiler)
## 
## clusterProfiler v3.10.1  For help: https://guangchuangyu.github.io/software/clusterProfiler
## 
## If you use clusterProfiler in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Yanyan Han, Qing-Yu He. clusterProfiler: an R package for comparing biological themes among gene clusters. OMICS: A Journal of Integrative Biology. 2012, 16(5):284-287.
###################################################################################################
###################################################################################################

############################## creat a list of genes/biomarkers ###################################

# pValues for biomarkers
pValues <- apply(VorinostatTotal, 1, function(x) t.test(x[col_untreated],x[col_treated],paired = TRUE, alternative = "two.sided")$p.value)

# sort the p.values 
sortedpValues <- sort(pValues, decreasing = FALSE)
sortedpValues <- as.matrix(sortedpValues)

# take the first 100 p.values for biomarkers 
biomarkers <- sortedpValues[1:100,]
biomarkers <- as.matrix(biomarkers)

# creat vector with genes 
biomarkers.genes = row.names(biomarkers)
biomarkers.genes
##   [1] "MAP1LC3B"        "TOE1"            "ATG101"         
##   [4] "PI4K2A"          "MEPCE"           "LANCL2"         
##   [7] "PMF1"            "RCC1L"           "ZNF277"         
##  [10] "ANP32B"          "GABARAPL2"       "SPAG9"          
##  [13] "SLC25A44"        "PPRC1"           "SCMH1"          
##  [16] "OSER1"           "NME6"            "KIAA0368"       
##  [19] "HDGF"            "HCFC1"           "U2AF2"          
##  [22] "UBTF"            "GMEB2"           "PHF2"           
##  [25] "SEPHS1"          "MRPL9"           "CERS2"          
##  [28] "CTCF"            "COIL"            "DPM1"           
##  [31] "ZFX"             "CYB5R1"          "HDAC3"          
##  [34] "ZMYND11"         "ASB6"            "ARL8B"          
##  [37] "AFTPH"           "NSMAF"           "SPAG7"          
##  [40] "TOX4"            "TUSC2"           "SLC5A6"         
##  [43] "DHX35"           "SUMO3"           "PSMB5"          
##  [46] "SMARCB1"         "AVEN"            "ARFGAP2"        
##  [49] "TAF5L"           "LSM7"            "UFC1"           
##  [52] "ATG4A"           "RBM39"           "PUF60"          
##  [55] "RBM15B"          "GABPB1"          "PTPN9"          
##  [58] "PATZ1"           "AAMDC"           "SIAH1"          
##  [61] "BRD1"            "ATP6V1D"         "TOR1A"          
##  [64] "MICU1"           "VPS52"           "CGRRF1"         
##  [67] "CLEC16A"         "FAIM"            "SENP6"          
##  [70] "BID"             "ABHD17A"         "TACO1"          
##  [73] "MRPL11"          "RNH1"            "RBX1"           
##  [76] "RNF114"          "LRRC41"          "PEF1"           
##  [79] "DDX54"           "PPP1R14B"        "PTPN1"          
##  [82] "ZFR"             "HABP4"           "SUV39H1"        
##  [85] "FAM57A"          "RAN"             "MRPS7"          
##  [88] "KPNB1"           "TXNDC15"         "SF3B3"          
##  [91] "CCDC28A"         "MIR6884///MED24" "SNX5"           
##  [94] "GEMIN4"          "IPO4"            "TRMT1"          
##  [97] "CETN2"           "RCN1"            "INPP5K"         
## [100] "ZNF394"
###################################################################################################
###################################################################################################

############################ translating amog diffrent gene ID types ##############################

# install needed libary form translation 
'if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")

BiocManager::install("org.Hs.eg.db")'
## [1] "if (!requireNamespace(\"BiocManager\", quietly = TRUE))\n  install.packages(\"BiocManager\")\n\nBiocManager::install(\"org.Hs.eg.db\")"
library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     Filter, Find, Map, Position, Reduce, anyDuplicated, append,
##     as.data.frame, basename, cbind, colMeans, colSums, colnames,
##     dirname, do.call, duplicated, eval, evalq, get, grep, grepl,
##     intersect, is.unsorted, lapply, lengths, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, rank, rbind,
##     rowMeans, rowSums, rownames, sapply, setdiff, sort, table,
##     tapply, union, unique, unsplit, which, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:base':
## 
##     expand.grid
## 
# do the translation 
translated.genes= bitr(biomarkers.genes,fromType="SYMBOL", toType="ENTREZID",OrgDb = org.Hs.eg.db) 
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(biomarkers.genes, fromType = "SYMBOL", toType =
## "ENTREZID", : 2% of input gene IDs are fail to map...
head(translated.genes)
##     SYMBOL ENTREZID
## 1 MAP1LC3B    81631
## 2     TOE1   114034
## 3   ATG101    60673
## 4   PI4K2A    55361
## 5    MEPCE    56257
## 6   LANCL2    55915
# in which types the gene names can be translated by this libary 
idType("org.Hs.eg.db")
##  [1] "ACCNUM"       "ALIAS"        "ENSEMBL"      "ENSEMBLPROT" 
##  [5] "ENSEMBLTRANS" "ENTREZID"     "ENZYME"       "EVIDENCE"    
##  [9] "EVIDENCEALL"  "GENENAME"     "GO"           "GOALL"       
## [13] "IPI"          "MAP"          "OMIM"         "ONTOLOGY"    
## [17] "ONTOLOGYALL"  "PATH"         "PFAM"         "PMID"        
## [21] "PROSITE"      "REFSEQ"       "SYMBOL"       "UCSCKG"      
## [25] "UNIGENE"      "UNIPROT"
###################################################################################################

################################### Gene Ontology Classification ##################################

# install needed libary 
'if (!requireNamespace("BiocManager", quietly = TRUE))
  install.packages("BiocManager")

BiocManager::install("DOSE")'
## [1] "if (!requireNamespace(\"BiocManager\", quietly = TRUE))\n  install.packages(\"BiocManager\")\n\nBiocManager::install(\"DOSE\")"
library(DOSE)
## Warning: package 'DOSE' was built under R version 3.5.2
## DOSE v3.8.2  For help: https://guangchuangyu.github.io/DOSE
## 
## If you use DOSE in published research, please cite:
## Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
data(biomarkers)
## Warning in data(biomarkers): data set 'biomarkers' not found
gene=translated.genes$ENTREZID
head(gene)
## [1] "81631"  "114034" "60673"  "55361"  "56257"  "55915"
###################################################################################################
# Notes: BP for Biological Process, MF for Molecular Function, and CC for Cellular Component

#### do the Gene Ontology Classification

### Biological Process
ggo <- groupGO(gene= gene, OrgDb = org.Hs.eg.db,  ont = "BP", level = 3, readable = FALSE)
ggo.data=as.data.frame(ggo)               
head(summary(ggo.data))
##           ID     
##  GO:0000075:  1  
##  GO:0000728:  1  
##  GO:0000734:  1  
##  GO:0000742:  1  
##  GO:0000743:  1  
##  GO:0000920:  1  
##                                                                     Description 
##  B cell selection                                                         :  1  
##  DNA clamp unloading                                                      :  1  
##  DNA recombinase assembly involved in gene conversion at mating-type locus:  1  
##  FtsZ-dependent cytokinesis                                               :  1  
##  RNA folding                                                              :  1  
##  Sertoli cell proliferation                                               :  1  
##      Count          GeneRatio  
##  Min.   : 0.000   0/98   :426  
##  1st Qu.: 0.000   1/98   : 34  
##  Median : 0.000   2/98   : 16  
##  Mean   : 2.636   5/98   : 12  
##  3rd Qu.: 0.000   3/98   : 10  
##  Max.   :79.000   7/98   :  8  
##                                    geneID   
##                                       :426  
##  22955/7543/23598/6477/55905/5901/1069:  5  
##  7543                                 :  5  
##  11334                                :  4  
##  10201                                :  3  
##  1861                                 :  3
#plot (ggo.data) --> ugly plot with no usable informations 
# visualization 
barplot(ggo, drop=TRUE, showCategory=12)

### Cellular Component
ggo2 <- groupGO(gene= gene, OrgDb = org.Hs.eg.db,  ont = "CC", level = 3, readable = FALSE)
barplot(ggo2, drop=TRUE, showCategory=12)

### Molecular Function
ggo3 <- groupGO(gene= gene, OrgDb = org.Hs.eg.db,  ont = "MF", level = 3, readable = FALSE)
barplot(ggo3, drop=TRUE, showCategory=12)

########################################## enrich GO ########################################################

# only works with gene id ENSEMBL 

library(org.Hs.eg.db)

# create gene data frame ans translate it 

gene2 <- row.names(biomarkers)
gene.df <- bitr(gene2, fromType = "SYMBOL",
                toType = c("ENSEMBL", "ENTREZID"),
                OrgDb = org.Hs.eg.db)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(gene2, fromType = "SYMBOL", toType = c("ENSEMBL",
## "ENTREZID"), : 2% of input gene IDs are fail to map...
##############################################################################################################
### Cellular Component
ego1 <- enrichGO(gene         = gene.df$ENSEMBL,
                 OrgDb         = org.Hs.eg.db,
                 keyType       = 'ENSEMBL',
                 ont           = "CC",
                 pAdjustMethod = "BH",
                 pvalueCutoff  = 0.01,
                 qvalueCutoff  = 0.05)

head(summary(ego1))
## Warning in summary(ego1): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
##                    ID                  Description GeneRatio   BgRatio
## GO:0000938 GO:0000938                 GARP complex     7/105  11/21794
## GO:1990745 GO:1990745                 EARP complex     7/105  11/21794
## GO:0099023 GO:0099023            tethering complex     7/105  78/21794
## GO:0055037 GO:0055037           recycling endosome     9/105 227/21794
## GO:0005802 GO:0005802          trans-Golgi network    10/105 295/21794
## GO:0032588 GO:0032588 trans-Golgi network membrane     7/105 155/21794
##                  pvalue     p.adjust       qvalue
## GO:0000938 1.597129e-14 2.220009e-12 1.882931e-12
## GO:1990745 1.597129e-14 2.220009e-12 1.882931e-12
## GO:0099023 9.819549e-08 9.099449e-06 7.717821e-06
## GO:0055037 1.555296e-06 9.158570e-05 7.767966e-05
## GO:0005802 1.647225e-06 9.158570e-05 7.767966e-05
## GO:0032588 1.020634e-05 4.728939e-04 4.010913e-04
##                                                                                                                                                                     geneID
## GO:0000938                                                 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:1990745                                                 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0099023                                                 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0055037                 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014/ENSG00000129968/ENSG00000132341
## GO:0005802 ENSG00000155252/ENSG00000149182/ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014/ENSG00000132376
## GO:0032588                                                 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
##            Count
## GO:0000938     7
## GO:1990745     7
## GO:0099023     7
## GO:0055037     9
## GO:0005802    10
## GO:0032588     7
# visualization 
dotplot(ego1, showCategory=3)

cnetplot(ego1)

emapplot(ego1)

##############################################################################################################

### Biological Process
ego2 <- enrichGO(gene         = gene.df$ENSEMBL,
                 OrgDb         = org.Hs.eg.db,
                 keyType       = 'ENSEMBL',
                 ont           = "BP",
                 pAdjustMethod = "BH",
                 pvalueCutoff  = 0.01,
                 qvalueCutoff  = 0.05)

head(summary(ego2))
## Warning in summary(ego2): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
##                    ID                             Description GeneRatio
## GO:0010668 GO:0010668         ectodermal cell differentiation     7/104
## GO:0006896 GO:0006896              Golgi to vacuole transport     7/104
## GO:0007398 GO:0007398                    ectoderm development     7/104
## GO:0042147 GO:0042147 retrograde transport, endosome to Golgi     9/104
## GO:0032456 GO:0032456                     endocytic recycling     7/104
## GO:0048566 GO:0048566   embryonic digestive tract development     7/104
##             BgRatio       pvalue     p.adjust       qvalue
## GO:0010668 14/20505 2.344304e-13 2.944446e-10 2.591073e-10
## GO:0006896 20/20505 5.165006e-12 3.243624e-09 2.854346e-09
## GO:0007398 30/20505 1.301306e-10 5.448136e-08 4.794287e-08
## GO:0042147 97/20505 1.541801e-09 4.841256e-07 4.260240e-07
## GO:0032456 43/20505 1.951711e-09 4.902698e-07 4.314309e-07
## GO:0048566 49/20505 5.074913e-09 1.062349e-06 9.348524e-07
##                                                                                                                                                     geneID
## GO:0010668                                 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0006896                                 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0007398                                 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0042147 ENSG00000008294/ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014/ENSG00000089006
## GO:0032456                                 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0048566                                 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
##            Count
## GO:0010668     7
## GO:0006896     7
## GO:0007398     7
## GO:0042147     9
## GO:0032456     7
## GO:0048566     7
# visualization 
dotplot(ego2, showCategory=10)

cnetplot(ego2)

emapplot(ego2)

##############################################################################################################

### Molecular Function
ego3 <- enrichGO(gene         = gene.df$ENSEMBL,
                 OrgDb         = org.Hs.eg.db,
                 keyType       = 'ENSEMBL',
                 ont           = "MF",
                 pAdjustMethod = "BH",
                 pvalueCutoff  = 0.01,
                 qvalueCutoff  = 0.05)

head(summary(ego3))
## Warning in summary(ego3): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
##                    ID
## GO:0000149 GO:0000149
## GO:0019905 GO:0019905
## GO:0001013 GO:0001013
## GO:0001163 GO:0001163
## GO:0001164 GO:0001164
## GO:0003713 GO:0003713
##                                                                 Description
## GO:0000149                                                    SNARE binding
## GO:0019905                                                 syntaxin binding
## GO:0001013                   RNA polymerase I regulatory region DNA binding
## GO:0001163 RNA polymerase I regulatory region sequence-specific DNA binding
## GO:0001164      RNA polymerase I CORE element sequence-specific DNA binding
## GO:0003713                               transcription coactivator activity
##            GeneRatio   BgRatio       pvalue     p.adjust       qvalue
## GO:0000149     8/106 113/19626 1.774625e-07 3.572971e-05 3.355989e-05
## GO:0019905     7/106  81/19626 2.748439e-07 3.572971e-05 3.355989e-05
## GO:0001013     3/106  10/19626 1.787711e-05 8.403651e-04 7.893308e-04
## GO:0001163     3/106  10/19626 1.787711e-05 8.403651e-04 7.893308e-04
## GO:0001164     3/106  10/19626 1.787711e-05 8.403651e-04 7.893308e-04
## GO:0003713    10/106 352/19626 2.100264e-05 8.403651e-04 7.893308e-04
##                                                                                                                                                                     geneID
## GO:0000149                                 ENSG00000034713/ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0019905                                                 ENSG00000223501/ENSG00000224455/ENSG00000228425/ENSG00000223618/ENSG00000206286/ENSG00000225590/ENSG00000236014
## GO:0001013                                                                                                                 ENSG00000108312/ENSG00000099956/ENSG00000275837
## GO:0001163                                                                                                                 ENSG00000108312/ENSG00000099956/ENSG00000275837
## GO:0001164                                                                                                                 ENSG00000108312/ENSG00000099956/ENSG00000275837
## GO:0003713 ENSG00000160783/ENSG00000148840/ENSG00000172534/ENSG00000101216/ENSG00000197724/ENSG00000005889/ENSG00000099956/ENSG00000275837/ENSG00000135801/ENSG00000132341
##            Count
## GO:0000149     8
## GO:0019905     7
## GO:0001013     3
## GO:0001163     3
## GO:0001164     3
## GO:0003713    10
# visualization 
dotplot(ego3, showCategory=100)

cnetplot(ego3)

emapplot(ego3)

### creat FC data 

FC <- TreatedVorinostat - UntreatedVorinostat

# work with mean of the rows because we only want to compare the genes 
FC_meanrow= rowMeans(FC)

#################################################################################################################

### sort the data 

# work with absolute value to find the highest values
# because we want to have the most up and down regulated genes 
FC_abs= abs(FC_meanrow)

## sort the values to get the 100 largest values 

sortedFC_abs <- sort(FC_abs, decreasing = TRUE)
sortedFC_abs <- as.matrix(sortedFC_abs)

# take the first 100 for biomarkers 
biomarkers_FC = sortedFC_abs[1:100,]
biomarkers_FC <- as.matrix(biomarkers_FC)

# see that the last ones have very similar values 

# creat vector with gene names 
biomarkers_FC_genes= row.names(biomarkers_FC)


#################################################################################################################

### creat matrix with FC values (positive and negativ) 

# add the abs values to FC matrix 
FC_both= cbind(FC_meanrow,FC_abs)
FC_both=as.data.frame(FC_both)

# order this matrix 
FC_both_sorted <- FC_both[order(FC_both$FC_abs, decreasing = TRUE),]

# FC values of biomarkers
# take the first 100 of the sorted matrix, should be the same as un biomarkers_FC 
biomarkers_FC_values = FC_both_sorted[1:100,]
# remove the absolute values
biomarkers_FC_values <- subset( biomarkers_FC_values, select = -FC_abs)
biomarkers_FC_values = as.matrix(biomarkers_FC_values)
library(KEGG.db)
## 
## KEGG.db contains mappings based on older data because the original
##   resource was removed from the the public domain before the most
##   recent update was produced. This package should now be
##   considered deprecated and future versions of Bioconductor may
##   not have it available.  Users who want more current data are
##   encouraged to look at the KEGGREST or reactome.db packages
# see for other applications of this libary: 
# https://bioconductor.org/packages/release/data/annotation/manuals/KEGG.db/man/KEGG.db.pdf

###################################################################################################
# works better with biomarkers from FC
library(org.Hs.eg.db)
gene2 <- row.names(biomarkers_FC)
gene.df <- bitr(gene2, fromType = "SYMBOL",
                toType = c("ENSEMBL", "ENTREZID"),
                OrgDb = org.Hs.eg.db)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(gene2, fromType = "SYMBOL", toType = c("ENSEMBL",
## "ENTREZID"), : 10% of input gene IDs are fail to map...
kk <- enrichKEGG(gene=gene.df$ENTREZID,pvalueCutoff = 0.05)
head(summary(kk))
## Warning in summary(kk): summary method to convert the object to data.frame
## is deprecated, please use as.data.frame instead.
##                ID                             Description GeneRatio
## hsa05202 hsa05202 Transcriptional misregulation in cancer      7/46
## hsa04215 hsa04215            Apoptosis - multiple species      3/46
## hsa05203 hsa05203                    Viral carcinogenesis      6/46
## hsa04210 hsa04210                               Apoptosis      5/46
## hsa05166 hsa05166 Human T-cell leukemia virus 1 infection      6/46
##           BgRatio       pvalue   p.adjust     qvalue
## hsa05202 186/7866 9.055915e-05 0.01331219 0.01162970
## hsa04215  33/7866 9.032146e-04 0.04092195 0.03574993
## hsa05203 201/7866 1.032008e-03 0.04092195 0.03574993
## hsa04210 136/7866 1.113522e-03 0.04092195 0.03574993
## hsa05166 219/7866 1.605648e-03 0.04720605 0.04123980
##                                     geneID Count
## hsa05202 8091/4790/330/4291/4804/8900/1026     7
## hsa04215                      330/637/4804     3
## hsa05203     3017/4790/8349/8365/8900/1026     6
## hsa04210            4001/4790/2353/330/637     5
## hsa05166     4790/2353/8061/8829/8900/1026     6
# visualization
barplot(kk,showCategory=12)

dotplot(kk, showCategory=12)

cnetplot(kk,categorySize="geneNum")

emapplot(kk)

# problem: only finds 5 categories 
################################# Gene Ontology analysis with FC biomarkers ##########################################

# load biomarkers over FC 
# matrix we will work with: 
biomarkers_FC_genes= row.names(biomarkers_FC)
biomarkers_FC_values = as.matrix(biomarkers_FC_values)

# load the library 
library(clusterProfiler)

#################################################################################################################

### translate the gene names 

library(org.Hs.eg.db)

# do the translation 
# index 2 because already done analysis with biomarkers from p.values 

translated.genes2= bitr(biomarkers_FC_genes,fromType="SYMBOL", toType="ENTREZID",OrgDb = org.Hs.eg.db) 
## 'select()' returned 1:1 mapping between keys and columns
## Warning in bitr(biomarkers_FC_genes, fromType = "SYMBOL", toType =
## "ENTREZID", : 10% of input gene IDs are fail to map...
head(translated.genes2)
##     SYMBOL ENTREZID
## 1    DHRS2    10202
## 2     ABAT       18
## 3 SERPINI1     5274
## 6      CLU     1191
## 7      NMI     9111
## 8     STC1     6781
#################################################################################################################

#### Gene Ontology Classification
# Notes: BP for Biological Process, MF for Molecular Function, and CC for Cellular Component

library(DOSE)

# take the needed gene ID
gene2=translated.genes2$ENTREZID
head(gene2)
## [1] "10202" "18"    "5274"  "1191"  "9111"  "6781"
### do classification

## 1. Biological Process
ggo2.1 <- groupGO(gene= gene2, OrgDb = org.Hs.eg.db,  ont = "BP", level = 3, readable = FALSE)
head(summary(ggo2.1))
## Warning in summary(ggo2.1): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
##                    ID                              Description Count
## GO:0019953 GO:0019953                      sexual reproduction     4
## GO:0019954 GO:0019954                     asexual reproduction     0
## GO:0022414 GO:0022414                     reproductive process     8
## GO:0032504 GO:0032504      multicellular organism reproduction     4
## GO:0032505 GO:0032505 reproduction of a single-celled organism     0
## GO:0061887 GO:0061887         reproduction of symbiont in host     0
##            GeneRatio                               geneID
## GO:0019953      4/90                     18/8061/330/8900
## GO:0019954      0/90                                     
## GO:0022414      8/90 18/6781/374/2353/10370/8061/330/8900
## GO:0032504      4/90                   6781/8061/330/8900
## GO:0032505      0/90                                     
## GO:0061887      0/90
# visualization 
barplot(ggo2.1, drop=TRUE, showCategory=12)

## 2. Molecular Function
ggo2.2 <- groupGO(gene= gene2, OrgDb = org.Hs.eg.db,  ont = "MF", level = 3, readable = FALSE)
head(summary(ggo2.2))
## Warning in summary(ggo2.2): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
##                    ID
## GO:0001070 GO:0001070
## GO:0001072 GO:0001072
## GO:0001073 GO:0001073
## GO:0001134 GO:0001134
## GO:0003700 GO:0003700
## GO:0003711 GO:0003711
##                                                           Description
## GO:0001070               RNA-binding transcription regulator activity
## GO:0001072 transcription antitermination factor activity, RNA binding
## GO:0001073 transcription antitermination factor activity, DNA binding
## GO:0001134                transcription regulator recruiting activity
## GO:0003700                  DNA-binding transcription factor activity
## GO:0003711                transcription elongation regulator activity
##            Count GeneRatio                                        geneID
## GO:0001070     0      0/90                                              
## GO:0001072     0      0/90                                              
## GO:0001073     0      0/90                                              
## GO:0001134     0      0/90                                              
## GO:0003700     9      9/90 8091/4790/2353/10370/8061/2305/2969/23635/467
## GO:0003711     0      0/90
# visualization 
barplot(ggo2.2, drop=TRUE, showCategory=12)

## 3. Cellular Component
ggo2.3 <- groupGO(gene= gene2, OrgDb = org.Hs.eg.db,  ont = "CC", level = 3, readable = FALSE)
head(summary(ggo2.3))
## Warning in summary(ggo2.3): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
##                    ID                    Description Count GeneRatio
## GO:0005886 GO:0005886                plasma membrane    25     25/90
## GO:0005628 GO:0005628              prospore membrane     0      0/90
## GO:0005789 GO:0005789 endoplasmic reticulum membrane     4      4/90
## GO:0019867 GO:0019867                 outer membrane     1      1/90
## GO:0031090 GO:0031090             organelle membrane    17     17/90
## GO:0034357 GO:0034357        photosynthetic membrane     0      0/90
##                                                                                                                                          geneID
## GO:0005886 6781/57030/8744/55256/4758/79850/360/5168/23208/7205/11151/80328/8061/4900/1490/10486/8829/55915/6515/1368/27340/6253/27131/4804/306
## GO:0005628                                                                                                                                     
## GO:0005789                                                                                                                   374/9526/6253/2281
## GO:0019867                                                                                                                                  637
## GO:0031090                                             1191/374/7298/57030/4758/4001/23208/51131/11151/4900/57134/6515/64921/27131/637/2281/306
## GO:0034357
# visualization 
barplot(ggo2.3, drop=TRUE, showCategory=12)

#################################################################################################################

### enrich GO 

# only works with gene id ENSEMBL 

library(org.Hs.eg.db)

# create gene data frame ans translate it 

gene2.2 <- row.names(biomarkers_FC_values)
gene.df2 <- bitr(gene2.2, fromType = "SYMBOL",
                toType = c("ENSEMBL", "ENTREZID"),
                OrgDb = org.Hs.eg.db)
## 'select()' returned 1:many mapping between keys and columns
## Warning in bitr(gene2.2, fromType = "SYMBOL", toType = c("ENSEMBL",
## "ENTREZID"), : 10% of input gene IDs are fail to map...
##############################################################################################################
### Cellular Component
ego2.1 <- enrichGO(gene         = gene.df2$ENSEMBL,
                 OrgDb         = org.Hs.eg.db,
                 keyType       = 'ENSEMBL',
                 ont           = "CC",
                 pAdjustMethod = "BH",
                 pvalueCutoff  = 0.01,
                 qvalueCutoff  = 0.05)

head(summary(ego2.1))
## Warning in summary(ego2.1): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
##                    ID               Description GeneRatio   BgRatio
## GO:0035580 GO:0035580    specific granule lumen      9/98  83/21794
## GO:0042581 GO:0042581          specific granule     11/98 213/21794
## GO:0043202 GO:0043202           lysosomal lumen      8/98 111/21794
## GO:0034774 GO:0034774   secretory granule lumen     12/98 374/21794
## GO:0060205 GO:0060205 cytoplasmic vesicle lumen     12/98 392/21794
## GO:0031983 GO:0031983             vesicle lumen     12/98 393/21794
##                  pvalue     p.adjust       qvalue
## GO:0035580 1.290481e-10 2.864867e-08 2.445121e-08
## GO:0042581 3.196320e-09 3.547915e-07 3.028092e-07
## GO:0043202 3.773459e-08 2.792360e-06 2.383237e-06
## GO:0034774 1.180312e-07 6.550730e-06 5.590950e-06
## GO:0060205 1.958048e-07 7.445533e-06 6.354652e-06
## GO:0031983 2.012306e-07 7.445533e-06 6.354652e-06
##                                                                                                                                                                                                     geneID
## GO:0035580                                                 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957/ENSG00000109320
## GO:0042581                 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957/ENSG00000109320/ENSG00000059804/ENSG00000138772
## GO:0043202                                                                 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0034774 ENSG00000163536/ENSG00000120885/ENSG00000103196/ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957/ENSG00000109320
## GO:0060205 ENSG00000163536/ENSG00000120885/ENSG00000103196/ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957/ENSG00000109320
## GO:0031983 ENSG00000163536/ENSG00000120885/ENSG00000103196/ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957/ENSG00000109320
##            Count
## GO:0035580     9
## GO:0042581    11
## GO:0043202     8
## GO:0034774    12
## GO:0060205    12
## GO:0031983    12
# visualization 
dotplot(ego2.1, showCategory=3)

cnetplot(ego2.1)

emapplot(ego2.1)

##############################################################################################################

### Biological Process
ego2.2 <- enrichGO(gene         = gene.df2$ENSEMBL,
                 OrgDb         = org.Hs.eg.db,
                 keyType       = 'ENSEMBL',
                 ont           = "BP",
                 pAdjustMethod = "BH",
                 pvalueCutoff  = 0.01,
                 qvalueCutoff  = 0.05)

head(summary(ego2.2))
## Warning in summary(ego2.2): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
##                    ID                         Description GeneRatio
## GO:0006689 GO:0006689       ganglioside catabolic process      8/95
## GO:0009313 GO:0009313   oligosaccharide catabolic process      8/95
## GO:0046479 GO:0046479 glycosphingolipid catabolic process      8/95
## GO:0019377 GO:0019377        glycolipid catabolic process      8/95
## GO:0046514 GO:0046514          ceramide catabolic process      8/95
## GO:0001573 GO:0001573       ganglioside metabolic process      8/95
##             BgRatio       pvalue     p.adjust       qvalue
## GO:0006689 14/20505 4.609982e-16 9.385924e-13 8.477514e-13
## GO:0009313 22/20505 4.762652e-14 3.232253e-11 2.919422e-11
## GO:0046479 22/20505 4.762652e-14 3.232253e-11 2.919422e-11
## GO:0019377 24/20505 1.087160e-13 5.533646e-11 4.998076e-11
## GO:0046514 25/20505 1.592733e-13 6.485610e-11 5.857906e-11
## GO:0001573 30/20505 8.457689e-13 2.869976e-10 2.592208e-10
##                                                                                                                                     geneID
## GO:0006689 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0009313 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0046479 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0019377 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0046514 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0001573 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
##            Count
## GO:0006689     8
## GO:0009313     8
## GO:0046479     8
## GO:0019377     8
## GO:0046514     8
## GO:0001573     8
# visualization 
dotplot(ego2.2, showCategory=10)

cnetplot(ego2.2)

emapplot(ego2.2)

##############################################################################################################

### Molecular Function
ego2.3 <- enrichGO(gene         = gene.df2$ENSEMBL,
                 OrgDb         = org.Hs.eg.db,
                 keyType       = 'ENSEMBL',
                 ont           = "MF",
                 pAdjustMethod = "BH",
                 pvalueCutoff  = 0.01,
                 qvalueCutoff  = 0.05)

head(summary(ego2.3))
## Warning in summary(ego2.3): summary method to convert the object to
## data.frame is deprecated, please use as.data.frame instead.
##                    ID                                          Description
## GO:0052794 GO:0052794                  exo-alpha-(2->3)-sialidase activity
## GO:0052795 GO:0052795                  exo-alpha-(2->6)-sialidase activity
## GO:0052796 GO:0052796                  exo-alpha-(2->8)-sialidase activity
## GO:0004308 GO:0004308                         exo-alpha-sialidase activity
## GO:0016997 GO:0016997                             alpha-sialidase activity
## GO:0004553 GO:0004553 hydrolase activity, hydrolyzing O-glycosyl compounds
##            GeneRatio   BgRatio       pvalue     p.adjust       qvalue
## GO:0052794      8/96  12/19626 1.184943e-16 1.228391e-14 1.130893e-14
## GO:0052795      8/96  12/19626 1.184943e-16 1.228391e-14 1.130893e-14
## GO:0052796      8/96  12/19626 1.184943e-16 1.228391e-14 1.130893e-14
## GO:0004308      8/96  14/19626 7.131444e-16 4.435758e-14 4.083690e-14
## GO:0016997      8/96  14/19626 7.131444e-16 4.435758e-14 4.083690e-14
## GO:0004553      9/96 105/19626 2.233203e-09 1.157544e-07 1.065669e-07
##                                                                                                                                                     geneID
## GO:0052794                 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0052795                 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0052796                 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0004308                 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0016997                 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957
## GO:0004553 ENSG00000204386/ENSG00000227315/ENSG00000234343/ENSG00000234846/ENSG00000227129/ENSG00000184494/ENSG00000228691/ENSG00000223957/ENSG00000117643
##            Count
## GO:0052794     8
## GO:0052795     8
## GO:0052796     8
## GO:0004308     8
## GO:0016997     8
## GO:0004553     9
# visualization 
dotplot(ego2.3, showCategory=10)

cnetplot(ego2.3)

emapplot(ego2.3)